An Advanced Introduction to

Kazuharu Yanagimoto

January 13, 2023

Project Based Workflow

Q. Why Don’t Your Codes Work on My Computer?

A. Conflicts in Path or Package Version

A. You don’t use here and renv

R Project

Have you ever click this button?


You should ALWAYS use R Project!

Why Do We Need to Use R Project?


Path Manager

Package Manager

Always Use here for Paths


The function here::here() treats the proejct directory as the root directory.

here::here()
[1] "/home/rstudio/workshop-r-2022"


You should always specify the path by here::here()

data <- readr::read_csv(
  here::here("data/tiny.csv")
)


It works in Windows, Mac, Linux (of course, in a Docker environment)

Remember…

If the first line of your R script is setwd("C:\Users\jenny\path\that\only\I\have")

I* will come into your office and SET YOUR COMPPUTER ON FIRE 🔥.

–Jenny Bryan

renv Is Smarter than Us


  • Init the environment with renv::init(). It creates renv/ and renv.lock file
  • At some point, you can record your package and its version information with renv::snapshot()
  • Your collaborater can install the packages just by renv::restore()
renv.lock
{
  "R": {
    "Version": "4.2.2",
    "Repositories": [
      {
        "Name": "CRAN",
        "URL": "https://packagemanager.posit.co/cran/latest"
      }
    ]
  },
  "Packages": {
    "DBI": {
      "Package": "DBI",
      "Version": "1.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b2866e62bab9378c3cc9476a1954226b",
      "Requirements": []
    },
    "MASS": {
      "Package": "MASS",
      "Version": "7.3-58.1",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "762e1804143a332333c054759f89a706",
      "Requirements": []
    },
    "Matrix": {
      "Package": "Matrix",
      "Version": "1.5-1",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "539dc0c0c05636812f1080f473d2c177",
      "Requirements": [
        "lattice"
      ]
    },
    "R6": {
      "Package": "R6",
      "Version": "2.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "470851b6d5d0ac559e9d01bb352b4021",
      "Requirements": []
    },
    "RColorBrewer": {
      "Package": "RColorBrewer",
      "Version": "1.1-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "45f0398006e83a5b10b72a90663d8d8c",
      "Requirements": []
    },
    "askpass": {
      "Package": "askpass",
      "Version": "1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e8a22846fff485f0be3770c2da758713",
      "Requirements": [
        "sys"
      ]
    },
    "assertthat": {
      "Package": "assertthat",
      "Version": "0.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "50c838a310445e954bc13f26f26a6ecf",
      "Requirements": []
    },
    "backports": {
      "Package": "backports",
      "Version": "1.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c39fbec8a30d23e721980b8afb31984c",
      "Requirements": []
    },
    "base64enc": {
      "Package": "base64enc",
      "Version": "0.1-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "543776ae6848fde2f48ff3816d0628bc",
      "Requirements": []
    },
    "bit": {
      "Package": "bit",
      "Version": "4.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "d242abec29412ce988848d0294b208fd",
      "Requirements": []
    },
    "bit64": {
      "Package": "bit64",
      "Version": "4.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9fe98599ca456d6552421db0d6772d8f",
      "Requirements": [
        "bit"
      ]
    },
    "blob": {
      "Package": "blob",
      "Version": "1.2.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "10d231579bc9c06ab1c320618808d4ff",
      "Requirements": [
        "rlang",
        "vctrs"
      ]
    },
    "broom": {
      "Package": "broom",
      "Version": "1.0.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1773f8d5102f9853ecd18a0d13d460fd",
      "Requirements": [
        "backports",
        "dplyr",
        "ellipsis",
        "generics",
        "glue",
        "purrr",
        "rlang",
        "stringr",
        "tibble",
        "tidyr"
      ]
    },
    "bslib": {
      "Package": "bslib",
      "Version": "0.4.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a7fbf03946ad741129dc81098722fca1",
      "Requirements": [
        "base64enc",
        "cachem",
        "htmltools",
        "jquerylib",
        "jsonlite",
        "memoise",
        "mime",
        "rlang",
        "sass"
      ]
    },
    "cachem": {
      "Package": "cachem",
      "Version": "1.0.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "648c5b3d71e6a37e3043617489a0a0e9",
      "Requirements": [
        "fastmap",
        "rlang"
      ]
    },
    "callr": {
      "Package": "callr",
      "Version": "3.7.3",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "9b2191ede20fa29828139b9900922e51",
      "Requirements": [
        "R6",
        "processx"
      ]
    },
    "cellranger": {
      "Package": "cellranger",
      "Version": "1.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f61dbaec772ccd2e17705c1e872e9e7c",
      "Requirements": [
        "rematch",
        "tibble"
      ]
    },
    "cli": {
      "Package": "cli",
      "Version": "3.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "eb9fc121ad9a1075c471107ef185be46",
      "Requirements": []
    },
    "clipr": {
      "Package": "clipr",
      "Version": "0.8.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3f038e5ac7f41d4ac41ce658c85e3042",
      "Requirements": []
    },
    "colorspace": {
      "Package": "colorspace",
      "Version": "2.0-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7",
      "Requirements": []
    },
    "cpp11": {
      "Package": "cpp11",
      "Version": "0.4.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "ed588261931ee3be2c700d22e94a29ab",
      "Requirements": []
    },
    "crayon": {
      "Package": "crayon",
      "Version": "1.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e8a1e41acf02548751f45c718d55aa6a",
      "Requirements": []
    },
    "curl": {
      "Package": "curl",
      "Version": "4.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "0eb86baa62f06e8855258fa5a8048667",
      "Requirements": []
    },
    "data.table": {
      "Package": "data.table",
      "Version": "1.14.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "aecef50008ea7b57c76f1cb5c127fb02",
      "Requirements": []
    },
    "dbplyr": {
      "Package": "dbplyr",
      "Version": "2.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f6c7eb9617e4d2a86bb7182fff99c805",
      "Requirements": [
        "DBI",
        "R6",
        "assertthat",
        "blob",
        "cli",
        "dplyr",
        "glue",
        "lifecycle",
        "magrittr",
        "pillar",
        "purrr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs",
        "withr"
      ]
    },
    "digest": {
      "Package": "digest",
      "Version": "0.6.31",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "8b708f296afd9ae69f450f9640be8990",
      "Requirements": []
    },
    "dplyr": {
      "Package": "dplyr",
      "Version": "1.0.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "539412282059f7f0c07295723d23f987",
      "Requirements": [
        "R6",
        "generics",
        "glue",
        "lifecycle",
        "magrittr",
        "pillar",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "dtplyr": {
      "Package": "dtplyr",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c5f8828a0b459a703db190b001ad4818",
      "Requirements": [
        "crayon",
        "data.table",
        "dplyr",
        "ellipsis",
        "glue",
        "lifecycle",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "ellipsis": {
      "Package": "ellipsis",
      "Version": "0.3.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077",
      "Requirements": [
        "rlang"
      ]
    },
    "evaluate": {
      "Package": "evaluate",
      "Version": "0.19",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5aac3cd0a3ccb1a738941796b28c26fe",
      "Requirements": []
    },
    "fansi": {
      "Package": "fansi",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "83a8afdbe71839506baa9f90eebad7ec",
      "Requirements": []
    },
    "farver": {
      "Package": "farver",
      "Version": "2.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "8106d78941f34855c440ddb946b8f7a5",
      "Requirements": []
    },
    "fastmap": {
      "Package": "fastmap",
      "Version": "1.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "77bd60a6157420d4ffa93b27cf6a58b8",
      "Requirements": []
    },
    "forcats": {
      "Package": "forcats",
      "Version": "0.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9d95bc88206321cd1bc98480ecfd74bb",
      "Requirements": [
        "cli",
        "ellipsis",
        "glue",
        "lifecycle",
        "magrittr",
        "rlang",
        "tibble",
        "withr"
      ]
    },
    "fs": {
      "Package": "fs",
      "Version": "1.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "7c89603d81793f0d5486d91ab1fc6f1d",
      "Requirements": []
    },
    "gargle": {
      "Package": "gargle",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "cca71329ad88e21267f09255d3f008c2",
      "Requirements": [
        "cli",
        "fs",
        "glue",
        "httr",
        "jsonlite",
        "rappdirs",
        "rlang",
        "rstudioapi",
        "withr"
      ]
    },
    "generics": {
      "Package": "generics",
      "Version": "0.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "15e9634c0fcd294799e9b2e929ed1b86",
      "Requirements": []
    },
    "ggplot2": {
      "Package": "ggplot2",
      "Version": "3.4.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "fd2aab12f54400c6bca43687231e246b",
      "Requirements": [
        "MASS",
        "cli",
        "glue",
        "gtable",
        "isoband",
        "lifecycle",
        "mgcv",
        "rlang",
        "scales",
        "tibble",
        "vctrs",
        "withr"
      ]
    },
    "glue": {
      "Package": "glue",
      "Version": "1.6.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e",
      "Requirements": []
    },
    "googledrive": {
      "Package": "googledrive",
      "Version": "2.0.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c3a25adbbfbb03f12e6f88c5fb1f3024",
      "Requirements": [
        "cli",
        "gargle",
        "glue",
        "httr",
        "jsonlite",
        "lifecycle",
        "magrittr",
        "pillar",
        "purrr",
        "rlang",
        "tibble",
        "uuid",
        "vctrs",
        "withr"
      ]
    },
    "googlesheets4": {
      "Package": "googlesheets4",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3b449d5292327880fc6cb61d0b2e9063",
      "Requirements": [
        "cellranger",
        "cli",
        "curl",
        "gargle",
        "glue",
        "googledrive",
        "httr",
        "ids",
        "magrittr",
        "purrr",
        "rematch2",
        "rlang",
        "tibble",
        "vctrs"
      ]
    },
    "gtable": {
      "Package": "gtable",
      "Version": "0.3.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "36b4265fb818f6a342bed217549cd896",
      "Requirements": []
    },
    "haven": {
      "Package": "haven",
      "Version": "2.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5b45a553fca2217a07b6f9c843304c44",
      "Requirements": [
        "cli",
        "cpp11",
        "forcats",
        "hms",
        "lifecycle",
        "readr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "here": {
      "Package": "here",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "24b224366f9c2e7534d2344d10d59211",
      "Requirements": [
        "rprojroot"
      ]
    },
    "highr": {
      "Package": "highr",
      "Version": "0.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "06230136b2d2b9ba5805e1963fa6e890",
      "Requirements": [
        "xfun"
      ]
    },
    "hms": {
      "Package": "hms",
      "Version": "1.1.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "41100392191e1244b887878b533eea91",
      "Requirements": [
        "ellipsis",
        "lifecycle",
        "pkgconfig",
        "rlang",
        "vctrs"
      ]
    },
    "htmltools": {
      "Package": "htmltools",
      "Version": "0.5.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9d27e99cc90bd701c0a7a63e5923f9b7",
      "Requirements": [
        "base64enc",
        "digest",
        "ellipsis",
        "fastmap",
        "rlang"
      ]
    },
    "httr": {
      "Package": "httr",
      "Version": "1.4.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "57557fac46471f0dbbf44705cc6a5c8c",
      "Requirements": [
        "R6",
        "curl",
        "jsonlite",
        "mime",
        "openssl"
      ]
    },
    "ids": {
      "Package": "ids",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "99df65cfef20e525ed38c3d2577f7190",
      "Requirements": [
        "openssl",
        "uuid"
      ]
    },
    "isoband": {
      "Package": "isoband",
      "Version": "0.2.7",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "0080607b4a1a7b28979aecef976d8bc2",
      "Requirements": []
    },
    "janitor": {
      "Package": "janitor",
      "Version": "2.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6de84a8c67fb247e721166049c84695f",
      "Requirements": [
        "dplyr",
        "lifecycle",
        "lubridate",
        "magrittr",
        "purrr",
        "rlang",
        "snakecase",
        "stringi",
        "stringr",
        "tidyr",
        "tidyselect"
      ]
    },
    "jquerylib": {
      "Package": "jquerylib",
      "Version": "0.1.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5aab57a3bd297eee1c1d862735972182",
      "Requirements": [
        "htmltools"
      ]
    },
    "jsonlite": {
      "Package": "jsonlite",
      "Version": "1.8.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a4269a09a9b865579b2635c77e572374",
      "Requirements": []
    },
    "knitr": {
      "Package": "knitr",
      "Version": "1.41",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6d4971f3610e75220534a1befe81bc92",
      "Requirements": [
        "evaluate",
        "highr",
        "stringr",
        "xfun",
        "yaml"
      ]
    },
    "labeling": {
      "Package": "labeling",
      "Version": "0.4.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3d5108641f47470611a32d0bdf357a72",
      "Requirements": []
    },
    "lattice": {
      "Package": "lattice",
      "Version": "0.20-45",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "b64cdbb2b340437c4ee047a1f4c4377b",
      "Requirements": []
    },
    "lifecycle": {
      "Package": "lifecycle",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "001cecbeac1cff9301bdc3775ee46a86",
      "Requirements": [
        "cli",
        "glue",
        "rlang"
      ]
    },
    "lubridate": {
      "Package": "lubridate",
      "Version": "1.9.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "2af4550c2f0f7fbe7cbbf3dbf4ea3902",
      "Requirements": [
        "generics",
        "timechange"
      ]
    },
    "magrittr": {
      "Package": "magrittr",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "7ce2733a9826b3aeb1775d56fd305472",
      "Requirements": []
    },
    "memoise": {
      "Package": "memoise",
      "Version": "2.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c",
      "Requirements": [
        "cachem",
        "rlang"
      ]
    },
    "mgcv": {
      "Package": "mgcv",
      "Version": "1.8-41",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "6b3904f13346742caa3e82dd0303d4ad",
      "Requirements": [
        "Matrix",
        "nlme"
      ]
    },
    "mime": {
      "Package": "mime",
      "Version": "0.12",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "18e9c28c1d3ca1560ce30658b22ce104",
      "Requirements": []
    },
    "modelr": {
      "Package": "modelr",
      "Version": "0.1.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bc23cda9c6a8f91dc1c10e1994494711",
      "Requirements": [
        "broom",
        "magrittr",
        "purrr",
        "rlang",
        "tibble",
        "tidyr",
        "tidyselect",
        "vctrs"
      ]
    },
    "munsell": {
      "Package": "munsell",
      "Version": "0.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6dfe8bf774944bd5595785e3229d8771",
      "Requirements": [
        "colorspace"
      ]
    },
    "nlme": {
      "Package": "nlme",
      "Version": "3.1-160",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "02e3c6e7df163aafa8477225e6827bc5",
      "Requirements": [
        "lattice"
      ]
    },
    "openssl": {
      "Package": "openssl",
      "Version": "2.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b04c27110bf367b4daa93f34f3d58e75",
      "Requirements": [
        "askpass"
      ]
    },
    "pillar": {
      "Package": "pillar",
      "Version": "1.8.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f2316df30902c81729ae9de95ad5a608",
      "Requirements": [
        "cli",
        "fansi",
        "glue",
        "lifecycle",
        "rlang",
        "utf8",
        "vctrs"
      ]
    },
    "pkgconfig": {
      "Package": "pkgconfig",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "01f28d4278f15c76cddbea05899c5d6f",
      "Requirements": []
    },
    "prettyunits": {
      "Package": "prettyunits",
      "Version": "1.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "95ef9167b75dde9d2ccc3c7528393e7e",
      "Requirements": []
    },
    "processx": {
      "Package": "processx",
      "Version": "3.8.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a33ee2d9bf07564efb888ad98410da84",
      "Requirements": [
        "R6",
        "ps"
      ]
    },
    "progress": {
      "Package": "progress",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061",
      "Requirements": [
        "R6",
        "crayon",
        "hms",
        "prettyunits"
      ]
    },
    "ps": {
      "Package": "ps",
      "Version": "1.7.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "68dd03d98a5efd1eb3012436de45ba83",
      "Requirements": []
    },
    "purrr": {
      "Package": "purrr",
      "Version": "1.0.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1ad491d27989ec6c26a2918ad6df116b",
      "Requirements": [
        "cli",
        "lifecycle",
        "magrittr",
        "rlang",
        "vctrs"
      ]
    },
    "rappdirs": {
      "Package": "rappdirs",
      "Version": "0.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5e3c5dc0b071b21fa128676560dbe94d",
      "Requirements": []
    },
    "readr": {
      "Package": "readr",
      "Version": "2.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "2dfbfc673ccb3de3d8836b4b3bd23d14",
      "Requirements": [
        "R6",
        "cli",
        "clipr",
        "cpp11",
        "crayon",
        "hms",
        "lifecycle",
        "rlang",
        "tibble",
        "tzdb",
        "vroom"
      ]
    },
    "readxl": {
      "Package": "readxl",
      "Version": "1.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5c1fbc365ac0a3fe7728ac79108b8e64",
      "Requirements": [
        "cellranger",
        "cpp11",
        "progress",
        "tibble"
      ]
    },
    "rematch": {
      "Package": "rematch",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c66b930d20bb6d858cd18e1cebcfae5c",
      "Requirements": []
    },
    "rematch2": {
      "Package": "rematch2",
      "Version": "2.1.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "76c9e04c712a05848ae7a23d2f170a40",
      "Requirements": [
        "tibble"
      ]
    },
    "renv": {
      "Package": "renv",
      "Version": "0.16.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c9e8442ab69bc21c9697ecf856c1e6c7",
      "Requirements": []
    },
    "reprex": {
      "Package": "reprex",
      "Version": "2.0.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "d66fe009d4c20b7ab1927eb405db9ee2",
      "Requirements": [
        "callr",
        "cli",
        "clipr",
        "fs",
        "glue",
        "knitr",
        "lifecycle",
        "rlang",
        "rmarkdown",
        "rstudioapi",
        "withr"
      ]
    },
    "rlang": {
      "Package": "rlang",
      "Version": "1.0.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4ed1f8336c8d52c3e750adcdc57228a7",
      "Requirements": []
    },
    "rmarkdown": {
      "Package": "rmarkdown",
      "Version": "2.19",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4e29299e1f4c7eabb0b8365b338adf3c",
      "Requirements": [
        "bslib",
        "evaluate",
        "htmltools",
        "jquerylib",
        "jsonlite",
        "knitr",
        "stringr",
        "tinytex",
        "xfun",
        "yaml"
      ]
    },
    "rprojroot": {
      "Package": "rprojroot",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1de7ab598047a87bba48434ba35d497d",
      "Requirements": []
    },
    "rstudioapi": {
      "Package": "rstudioapi",
      "Version": "0.14",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "690bd2acc42a9166ce34845884459320",
      "Requirements": []
    },
    "rvest": {
      "Package": "rvest",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a4a5ac819a467808c60e36e92ddf195e",
      "Requirements": [
        "cli",
        "glue",
        "httr",
        "lifecycle",
        "magrittr",
        "rlang",
        "selectr",
        "tibble",
        "withr",
        "xml2"
      ]
    },
    "sass": {
      "Package": "sass",
      "Version": "0.4.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c76cbac7ca04ce82d8c38e29729987a3",
      "Requirements": [
        "R6",
        "fs",
        "htmltools",
        "rappdirs",
        "rlang"
      ]
    },
    "scales": {
      "Package": "scales",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "906cb23d2f1c5680b8ce439b44c6fa63",
      "Requirements": [
        "R6",
        "RColorBrewer",
        "farver",
        "labeling",
        "lifecycle",
        "munsell",
        "rlang",
        "viridisLite"
      ]
    },
    "selectr": {
      "Package": "selectr",
      "Version": "0.4-2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3838071b66e0c566d55cc26bd6e27bf4",
      "Requirements": [
        "R6",
        "stringr"
      ]
    },
    "snakecase": {
      "Package": "snakecase",
      "Version": "0.11.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4079070fc210c7901c0832a3aeab894f",
      "Requirements": [
        "stringi",
        "stringr"
      ]
    },
    "stringi": {
      "Package": "stringi",
      "Version": "1.7.8",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a68b980681bcbc84c7a67003fa796bfb",
      "Requirements": []
    },
    "stringr": {
      "Package": "stringr",
      "Version": "1.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "magrittr",
        "rlang",
        "stringi",
        "vctrs"
      ]
    },
    "sys": {
      "Package": "sys",
      "Version": "3.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "34c16f1ef796057bfa06d3f4ff818a5d",
      "Requirements": []
    },
    "tibble": {
      "Package": "tibble",
      "Version": "3.1.8",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "56b6934ef0f8c68225949a8672fe1a8f",
      "Requirements": [
        "fansi",
        "lifecycle",
        "magrittr",
        "pillar",
        "pkgconfig",
        "rlang",
        "vctrs"
      ]
    },
    "tidyr": {
      "Package": "tidyr",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "cdb403db0de33ccd1b6f53b83736efa8",
      "Requirements": [
        "cpp11",
        "dplyr",
        "ellipsis",
        "glue",
        "lifecycle",
        "magrittr",
        "purrr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "tidyselect": {
      "Package": "tidyselect",
      "Version": "1.2.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "79540e5fcd9e0435af547d885f184fd5",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "rlang",
        "vctrs",
        "withr"
      ]
    },
    "tidyverse": {
      "Package": "tidyverse",
      "Version": "1.3.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "972389aea7fa1a34739054a810d0c6f6",
      "Requirements": [
        "broom",
        "cli",
        "crayon",
        "dbplyr",
        "dplyr",
        "dtplyr",
        "forcats",
        "ggplot2",
        "googledrive",
        "googlesheets4",
        "haven",
        "hms",
        "httr",
        "jsonlite",
        "lubridate",
        "magrittr",
        "modelr",
        "pillar",
        "purrr",
        "readr",
        "readxl",
        "reprex",
        "rlang",
        "rstudioapi",
        "rvest",
        "stringr",
        "tibble",
        "tidyr",
        "xml2"
      ]
    },
    "timechange": {
      "Package": "timechange",
      "Version": "0.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4657195cc632097bb8d140d626b519fb",
      "Requirements": [
        "cpp11"
      ]
    },
    "tinytex": {
      "Package": "tinytex",
      "Version": "0.43",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "facc02f3d63ed7dd765513c004c394ce",
      "Requirements": [
        "xfun"
      ]
    },
    "tzdb": {
      "Package": "tzdb",
      "Version": "0.3.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e",
      "Requirements": [
        "cpp11"
      ]
    },
    "utf8": {
      "Package": "utf8",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c9c462b759a5cc844ae25b5942654d13",
      "Requirements": []
    },
    "uuid": {
      "Package": "uuid",
      "Version": "1.1-0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f1cb46c157d080b729159d407be83496",
      "Requirements": []
    },
    "vctrs": {
      "Package": "vctrs",
      "Version": "0.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "970324f6572b4fd81db507b5d4062cb0",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "rlang"
      ]
    },
    "viridisLite": {
      "Package": "viridisLite",
      "Version": "0.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "62f4b5da3e08d8e5bcba6cac15603f70",
      "Requirements": []
    },
    "vroom": {
      "Package": "vroom",
      "Version": "1.6.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "64f81fdead6e0d250fb041e175d123ab",
      "Requirements": [
        "bit64",
        "cli",
        "cpp11",
        "crayon",
        "glue",
        "hms",
        "lifecycle",
        "progress",
        "rlang",
        "tibble",
        "tidyselect",
        "tzdb",
        "vctrs",
        "withr"
      ]
    },
    "withr": {
      "Package": "withr",
      "Version": "2.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c0e49a9760983e81e55cdd9be92e7182",
      "Requirements": []
    },
    "xfun": {
      "Package": "xfun",
      "Version": "0.36",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f5baec54606751aa53ac9c0e05848ed6",
      "Requirements": []
    },
    "xml2": {
      "Package": "xml2",
      "Version": "1.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "40682ed6a969ea5abfd351eb67833adc",
      "Requirements": []
    },
    "yaml": {
      "Package": "yaml",
      "Version": "2.3.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9b570515751dcbae610f29885e025b41",
      "Requirements": []
    }
  }
}

But Dropbox might ruin…

(Advanced) renv with Cloud Storage

(Advanced) Docker


Problems renv can solve are only packages. They may come from differences in

  • R versions ⇒ Always use the latest version of R
  • Non-R dependencies (e.g., geospatial packages) ⇒ Docker can solve
  • OS (only Windows binary produces bugs…) ⇒ Docker can solve


Docker

  • A virtual machine. Write a blueprint (Dockerfile) including information of OS (Linux), Application (R and others), and Packages
  • If you work on Docker, others can perfectly replicate your environment

Handson 1


  1. Clone (or download) the course repositiory
  2. Open the course project (workshop-r-2022.Rproj)
  3. Run renv::restore() in R console
  4. Confirm you can render sample notebooks without any problem


Warning

Please make sure if you are using the latest R version 4.2.2 (2022-10-31).

Cleaning Strategy

The Fundamental Theorem of Readability

Code should be written to minimize the time it would take for someone else to understand it.


For economists,

\[ \text{Code} := \arg\min_{c \in \mathcal{C}}\mathbb{E}_i[R_{i}(c)] \]

where

  • \(\mathcal{C}\): Set of codes that work
  • \(i\): A potential reader including yourself at a different time point
  • \(R_{i}(c)\): Time taken by person \(i\) to understand code \(c\)

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Boolean

  • is_*, has_*, should_* indicates the type boolean.
  • Starting with not_*/no_* increases a step of recognition

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Categorical

  • Attached number indicates if it is categorical and its number

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Bins of continuous variables

  • Need to avoid the confusion with its continuous variable
  • Attached number shows the width of the bin

Rename at Once

raw <- read_delim(here("data/raw/accident_bike/year=2022/file.txt"),
        delim = ";", show_col_types = FALSE)
Rows: 42,547
Columns: 5
$ num_expediente <dbl> 2.022e+04, 2.022e+04, 2.022e+05, 2.022e+05, 2.022e+05, …
$ fecha          <chr> "01/01/2022", "01/01/2022", "01/01/2022", "01/01/2022",…
$ hora           <time> 01:30:00, 01:30:00, 00:30:00, 00:30:00, 00:30:00, 01:5…
$ localizacion   <chr> "AVDA. ALBUFERA, 19", "AVDA. ALBUFERA, 19", "PLAZA. CAN…
$ numero         <chr> "19", "19", "2", "2", "2", "53", "53", "728", "728", "+…


code <- read_csv(here("data/translate/accident_bike.csv"),
                     show_col_types = FALSE)
renamed <- raw |>
  rename_at(vars(code$spanish), ~code$english)
Rows: 42,547
Columns: 5
$ id_1922    <dbl> 2.022e+04, 2.022e+04, 2.022e+05, 2.022e+05, 2.022e+05, 2.02…
$ date       <chr> "01/01/2022", "01/01/2022", "01/01/2022", "01/01/2022", "01…
$ hms        <time> 01:30:00, 01:30:00, 00:30:00, 00:30:00, 00:30:00, 01:50:00…
$ street     <chr> "AVDA. ALBUFERA, 19", "AVDA. ALBUFERA, 19", "PLAZA. CANOVAS…
$ num_street <chr> "19", "19", "2", "2", "2", "53", "53", "728", "728", "+0050…
spanish english
num_expediente id_1922
fecha date
hora hms
localizacion street
numero num_street
cod_distrito code_district
distrito district
tipo_accidente type_accident
estado_meteorológico weather
tipo_vehiculo type_vehicle
tipo_persona type_person
rango_edad age_c
sexo gender
cod_lesividad code_injury8
lesividad injury8
coordenada_x_utm coord_x
coordenada_y_utm coord_y
positiva_alcohol positive_alcohol
positiva_droga positive_drug

Type

Facotrizing

Format: Parquet

(Advanced) Parquet with Large Dataset

Cleaning Workflow

Handson 2

Some example of cleaning

Tips in Plots

Layers in ggplots

Scales

Color Palette

Themes

Third-party Themes

My Favorite ggplot2 Family

Automated Table Creation

kableExtra: Example

tab
# A tibble: 6 × 9
# Groups:   weather [6]
  weather   n_men_2019 n_men_2…¹ n_men…² n_men…³ n_wom…⁴ n_wom…⁵ n_wom…⁶ n_wom…⁷
  <fct>          <int>     <int>   <int>   <int>   <int>   <int>   <int>   <int>
1 sunny          24399     14969   19208   19420   11971    6958    9417    9298
2 cloud           1159      1190    1325    1633     555     554     630     774
3 soft rain       2126      1198    1281    1408    1068     542     605     716
4 hard rain        386       202     386     352     222      96     210     179
5 snow               2         2     124       5      NA      NA      38       1
6 hail              11         5       6       4       3       3       1       2
# … with abbreviated variable names ¹​n_men_2020, ²​n_men_2021, ³​n_men_2022,
#   ⁴​n_women_2019, ⁵​n_women_2020, ⁶​n_women_2021, ⁷​n_women_2022
library(kableExtra)
options(knitr.kable.NA = '')

ktb <- tab |>
  kbl(format = "latex", booktabs = TRUE,
      col.names = c(" ", 2019:2022, 2019:2022)) |>
  add_header_above(c(" ", "Men" = 4, "Women" = 4)) |>
  pack_rows(index = c("Good" = 2, "Bad" = 4))

ktb |>
  save_kable(here("output/tex/kableextra/tb_accident_bike.tex"))

  • booktabs = TRUE for booktabs package in LaTeX
  • You can specify the column names by col.names
  • You can pack columns and rows by add_header_above() and pack_rows()
  • save_kable() saves in a tex file if the file name ends with “.tex”

kableExtra

Dataframe (tibble) to Table

  • Create a tibble table by dplyr::group_by & dpyr::summarize and janitor::tabyl()
  • For regression tables, you can use modelsummary (next slide)

Pack Columns and Rows

  • As far as I know, Python, Julia, and Stata do not allow us to pack them easily

More Complicated Tables

  • You can refer to Hao Zhu’s document
  • If a table contains a mathematical expression, use escape=FALSE. See a discussion in stacoverflow

modelsummary

Given the following regression results,


library(fixest) # for faster regression with fixed effect

models <- list(
    "(1)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender,
                family = binomial(logit), data = data),
    "(2)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle,
                family = binomial(logit), data = data),
    "(3)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather,
                family = binomial(logit), data = data),
    "(4)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender,
                family = binomial(logit), data = data),
    "(5)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle,
                family = binomial(logit), data = data),
    "(6)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather,
                family = binomial(logit), data = data)
)

modelsummary: Init

modelsummary(models)
(1) (2) (3) (4) (5) (6)
type_personpassenger 0.049 0.530 0.507 −1.781 −1.575 −1.565
(0.104) (0.071) (0.070) (0.759) (0.783) (0.784)
type_personpedestrian 2.124 2.402 2.323 2.280 2.418 2.422
(0.115) (0.066) (0.064) (0.301) (0.287) (0.285)
positive_alcoholTRUE −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Num.Obs. 149918 149831 134006 90852 89300 86330
R2 0.055 0.171 0.165 0.107 0.145 0.148
R2 Adj. 0.054 0.170 0.163 0.086 0.113 0.112
R2 Within 0.047 0.054 0.052 0.073 0.076 0.076
R2 Within Adj. 0.047 0.054 0.052 0.070 0.072 0.073
AIC 62871.0 55210.6 53565.4 1601.9 1552.2 1534.5
BIC 63079.3 55696.5 54085.1 1780.8 1824.8 1834.2
RMSE 0.23 0.22 0.23 0.04 0.04 0.04
Std.Errors by: age_c by: age_c by: age_c by: age_c by: age_c by: age_c
FE: age_c X X X X X X
FE: gender X X X X X X
FE: type_vehicle X X X X
FE: weather X X

modelsummary: Modify Coefficients

cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

modelsummary(models,
  coef_map = cm
)
(1) (2) (3) (4) (5) (6)
Passenger 0.049 0.530 0.507 −1.781 −1.575 −1.565
(0.104) (0.071) (0.070) (0.759) (0.783) (0.784)
Pedestrian 2.124 2.402 2.323 2.280 2.418 2.422
(0.115) (0.066) (0.064) (0.301) (0.287) (0.285)
Positive Alcohol −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Num.Obs. 149918 149831 134006 90852 89300 86330
R2 0.055 0.171 0.165 0.107 0.145 0.148
R2 Adj. 0.054 0.170 0.163 0.086 0.113 0.112
R2 Within 0.047 0.054 0.052 0.073 0.076 0.076
R2 Within Adj. 0.047 0.054 0.052 0.070 0.072 0.073
AIC 62871.0 55210.6 53565.4 1601.9 1552.2 1534.5
BIC 63079.3 55696.5 54085.1 1780.8 1824.8 1834.2
RMSE 0.23 0.22 0.23 0.04 0.04 0.04
Std.Errors by: age_c by: age_c by: age_c by: age_c by: age_c by: age_c
FE: age_c X X X X X X
FE: gender X X X X X X
FE: type_vehicle X X X X
FE: weather X X

modelsummary: Modify Statitics

cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  coef_map = cm,
  gof_map = gm
)
(1) (2) (3) (4) (5) (6)
Passenger 0.049 0.530 0.507 −1.781 −1.575 −1.565
(0.104) (0.071) (0.070) (0.759) (0.783) (0.784)
Pedestrian 2.124 2.402 2.323 2.280 2.418 2.422
(0.115) (0.066) (0.064) (0.301) (0.287) (0.285)
Positive Alcohol −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Observations 149918 149831 134006 90852 89300 86330
FE: Age Group X X X X X X
FE: Gender X X X X X X
FE: Type of Vehicle X X X X
FE: Weather X X

modelsummary: Decorate

cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  stars = c("+" = .1, "*" = .05, "**" = .01),
  coef_map = cm,
  gof_map = gm) |>
  add_header_above(c(" ", "Hospitalization" = 3, "Died within 24 hours" = 3)) 
Hospitalization
Died within 24 hours
(1) (2) (3) (4) (5) (6)
Passenger 0.049 0.530** 0.507** −1.781* −1.575+ −1.565+
(0.104) (0.071) (0.070) (0.759) (0.783) (0.784)
Pedestrian 2.124** 2.402** 2.323** 2.280** 2.418** 2.422**
(0.115) (0.066) (0.064) (0.301) (0.287) (0.285)
Positive Alcohol −0.077 0.310** 0.353** −13.710** −13.455** −13.492**
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Observations 149918 149831 134006 90852 89300 86330
FE: Age Group X X X X X X
FE: Gender X X X X X X
FE: Type of Vehicle X X X X
FE: Weather X X
+ p < 0.1, * p < 0.05, ** p < 0.01

modelsummary: Export to \(\LaTeX\)

cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  output = "latex_tabular",
  stars = c("+" = .1, "*" = .05, "**" = .01),
  coef_map = cm,
  gof_map = gm) |>
  add_header_above(c(" ", "Hospitalization" = 3, "Died within 24 hours" = 3)) |>
  row_spec(7, hline_after = T) |>
  save_kable(here("output/tex/modelsummary/reg_accident_bike.tex"))

Handson

Quarto

What Is Quarto?

Markdown ⇔ \(\LaTeX\)

Interactive Plots

Quarto Presentation